Load in the necessary libraries

library(readxl)
library(ggplot2)
library(DT)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(stringr)
library(pheatmap)
library(rjson)
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.6.2
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite:
## Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
## ! pheatmap() has been masked by ComplexHeatmap::pheatmap(). 90% of the arguments
##    in the original pheatmap() are identically supported in the new function. You 
##    can still use the original function by explicitly calling pheatmap::pheatmap().
## 
## Attaching package: 'ComplexHeatmap'
## The following object is masked from 'package:pheatmap':
## 
##     pheatmap
library(GO.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:plyr':
## 
##     rename
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:base':
## 
##     expand.grid
## 
## Attaching package: 'IRanges'
## The following object is masked from 'package:plyr':
## 
##     desc
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
library(KEGGREST)
library(KEGG.db)
## 
## KEGG.db contains mappings based on older data because the original
##   resource was removed from the the public domain before the most
##   recent update was produced. This package should now be considered
##   deprecated and future versions of Bioconductor may not have it
##   available.  Users who want more current data are encouraged to look
##   at the KEGGREST or reactome.db packages
## Warning: Package 'KEGG.db' is deprecated and will be removed from Bioconductor
##   version 3.12
library(fgsea)
library(org.Hs.eg.db)
## 
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
## 
##     mutate

Exploring the sample metadata via the study manifest

finding samples that did not get aligned on JHPCE for some reason

manifest <- read_excel("/media/theron/My_Passport/Valsamo/manifest.xlsx")
files_to_remove <- c("hg19MTERCC-ensembl75-genes-Q21777-Plate-1-E06_L65",
"hg19MTERCC-ensembl75-genes-Q21777-Plate-1-F12_L1.D707_508",
"hg19MTERCC-ensembl75-genes-Q23152+B4+H2+AG710464_L1.D705")
manifest <- manifest %>% dplyr::filter(!(Sample %in% files_to_remove))
manifest$Sample <- str_replace_all(manifest$Sample,"hg19MTERCC-ensembl75-genes-","")
fastq_files <- read.table("/media/theron/My_Passport/Valsamo/fastq_files.txt")
SJ_files <- read.table("/media/theron/My_Passport/Valsamo/SJ_files.txt")
SJ_files$sample_name <- vapply(SJ_files$V1,function(file){
  str_remove(file,"SJ.out.tab")
},character(1))
fastq_files$sample_name <- vapply(fastq_files$V1,function(file){
  str_remove(file,"_1.clipped.fastq.gz")
},character(1))
missing_files <- data.frame(sprintf("/dcs04/fertig/data/theron/share/%s",fastq_files[which(!(fastq_files$sample_name %in% SJ_files$sample_name)),"V1"]))
write.table(missing_files,file="/media/theron/My_Passport/Valsamo/missing_fasta.txt",
            quote=F, col.names = F, row.names = F, sep = "\t")

Looking at response rates across treatment group

RESIST 1.1 Progression Terms form: https://project.eortc.org/recist/wp-content/uploads/sites/4/2015/03/RECISTGuidelines.pdf

CR = Complete Response
PD = Progressive Disease
PR = Partial Response
SD = Stable Disease
NE = ?

ggplot(manifest,aes(x=TRTGRP,y=log2(PFS),fill=TRTGRP))+geom_violin()+geom_point(position = position_jitter(seed = 1, width = 0.2))

ggplot(manifest,aes(x=TRTGRP,y=log2(OS),fill=TRTGRP))+geom_violin()+geom_point(position = position_jitter(seed = 1, width = 0.2))

ggplot(manifest,aes(x=TRTGRP,fill=AX_BOR))+geom_bar(position='dodge')

ggplot(manifest,aes(x=TRTGRP,fill=AX_BOR3))+geom_bar(position='dodge')

Leafcutter Analysis Preparation

Comparisons:
a) NIV3.NAIVE(PR) vs NIV3.NAIVE(PD)
b) NIV3.NAIVE(CR) vs NIV3.NAIVE(PD)
c) NIV3.NAIVE(SD) vs NIV3.NAIVE(PD)
d) NIV3.PROG(PR) vs NIV3.PROG(PD)
e) NIV3.PROG(CR) vs NIV3.PROG(PD)
f) NIV3.PROG(SD) vs NIV3.PROG(PD)
g) NIV1.IPI3(PR) vs NIV1.IPI3(PD)
h) NIV1.IPI3(SD) vs NIV3.PROG(PD)
i) NIV1.IPI3(PR) vs NIV3.PROG(PD)
j) NIV1.IPI3(SD) vs NIV3.PROG(PD)
k) NIV1.IPI3(SD) vs NIV3.NAIVE(PD)
l) NIV1.IPI3(PR) vs NIV3.NAIVE(PD)
m) NIV1.IPI3(SD) vs NIV3.NAIVE(PD)
n) NIV3.PROG(CR) vs NIV3.PROG(SD)
o) NIV3.PROG(PR) vs NIV3.PROG(SD)
p) NIV3.PROG(CR) vs NIV3.PROG(PR)
q) NIV3.NAIVE(CR) vs NIV3.NAIVE(SD)
r) NIV3.NAIVE(PR) vs NIV3.NAIVE(SD)
s) NIV3.NAIVE(CR) vs NIV3.NAIVE(PR)

Forming groups_file.txt and junc_file.txt for each comparison

NIV3_NAIVE <- manifest[which(manifest$TRTGRP == "NIV3-NAIVE"),]
NIV3_PROG <- manifest[which(manifest$TRTGRP == "NIV3-PROG"),]
NIV1_IPI3 <- manifest[which(manifest$TRTGRP %in% c("NIV1+IPI3 P2","NIV1+IPI3 P3")),]
groups_and_junc_dir <- "/media/theron/My_Passport/Valsamo/leafcutter_prep"
JHPCE_dir <- "/dcs04/fertig/data/theron/share/juncs"
comparisons <- list()

NIV3.NAIVE(PR) vs NIV3.NAIVE(PD)

tar<-"PR"
comp<-"PD"
tar_samp<-"NIV3_NAIVE"
comp_samp<-"NIV3_NAIVE"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == tar]
comparators<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a


for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV3.NAIVE(CR) vs NIV3.NAIVE(PD)

tar<-"CR"
comp<-"PD"
tar_samp<-"NIV3_NAIVE"
comp_samp<-"NIV3_NAIVE"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == tar]
comparators<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV3.NAIVE(SD) vs NIV3.NAIVE(PD)

tar<-"SD"
comp<-"PD"
tar_samp<-"NIV3_NAIVE"
comp_samp<-"NIV3_NAIVE"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == tar]
comparators<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV3.PROG(PR) vs NIV3.PROG(PD)

tar<-"PR"
comp<-"PD"
tar_samp <- "NIV3_PROG"
comp_samp<-"NIV3_PROG"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == tar]
comparators<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV3.PROG(CR) vs NIV3.PROG(PD)

tar<-"CR"
comp<-"PD"
tar_samp <- "NIV3_PROG"
comp_samp<-"NIV3_PROG"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == tar]
comparators<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV3.PROG(SD) vs NIV3.PROG(PD)

tar<-"SD"
comp<-"PD"
tar_samp <- "NIV3_PROG"
comp_samp<-"NIV3_PROG"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == tar]
comparators<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV1.IPI3(PR) vs NIV1.IPI3(PD)

tar<-"PR"
comp<-"PD"
tar_samp <- "NIV1_IPI3"
comp_samp<-"NIV1_IPI3"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR == tar]
comparators<-NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV1.IPI3(SD) vs NIV3.PROG(PD)

tar<-"SD"
comp<-"PD"
tar_samp <- "NIV1_IPI3"
comp_samp<-"NIV3_PROG"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR == tar]
comparators<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV1.IPI3(PR) vs NIV3.PROG(PD)

tar<-"PR"
comp<-"PD"
tar_samp <- "NIV1_IPI3"
comp_samp<-"NIV3_PROG"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR == tar]
comparators<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV1.IPI3(SD) vs NIV3.PROG(PD)

tar<-"SD"
comp<-"PD"
tar_samp <- "NIV1_IPI3"
comp_samp<-"NIV3_PROG"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR == tar]
comparators<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV1.IPI3(SD) vs NIV3.NAIVE(PD)

tar<-"SD"
comp<-"PD"
tar_samp <- "NIV1_IPI3"
comp_samp<-"NIV3_NAIVE"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR == tar]
comparators<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV1.IPI3(PR) vs NIV3.NAIVE(PD)

tar<-"PR"
comp<-"PD"
tar_samp <- "NIV1_IPI3"
comp_samp<-"NIV3_NAIVE"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR == tar]
comparators<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV1.IPI3(SD) vs NIV3.NAIVE(PD)

tar<-"SD"
comp<-"PD"
tar_samp <- "NIV1_IPI3"
comp_samp<-"NIV3_NAIVE"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV1_IPI3$Sample[NIV1_IPI3$AX_BOR == tar]
comparators<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == comp]

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV3.PROG(CR) vs NIV3.PROG(SD)

tar<-"CR"
comp<-"SD"
tar_samp <- "NIV3_PROG"
comp_samp<-"NIV3_PROG"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == tar]
comparators<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV3.PROG(PR) vs NIV3.PROG(SD)

tar<-"PR"
comp<-"SD"
tar_samp <- "NIV3_PROG"
comp_samp<-"NIV3_PROG"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == tar]
comparators<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV3.PROG(CR) vs NIV3.PROG(PR)

tar<-"CR"
comp<-"PR"
tar_samp <- "NIV3_PROG"
comp_samp<-"NIV3_PROG"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == tar]
comparators<-NIV3_PROG$Sample[NIV3_PROG$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV3.NAIVE(CR) vs NIV3.NAIVE(SD)

tar<-"CR"
comp<-"SD"
tar_samp <- "NIV3_NAIVE"
comp_samp<-"NIV3_NAIVE"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == tar]
comparators<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV3.NAIVE(PR) vs NIV3.NAIVE(SD)

tar<-"PR"
comp<-"SD"
tar_samp <- "NIV3_NAIVE"
comp_samp<-"NIV3_NAIVE"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == tar]
comparators<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

NIV3.NAIVE(CR) vs NIV3.NAIVE(PR)

tar<-"CR"
comp<-"PR"
tar_samp <- "NIV3_NAIVE"
comp_samp<-"NIV3_NAIVE"
comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
targets<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == tar]
comparators<-NIV3_NAIVE$Sample[NIV3_NAIVE$AX_BOR == comp]
a<-list()
a[["targets"]] <- targets
a[["comparators"]] <- comparators
comparisons[[comparison]] <- a

for (target in targets){
  target_junc_file <- sprintf("%s/%s_juncs_%s.txt",groups_and_junc_dir,target,comparison)
  file_contents <- sprintf("%s.filt.junc",c(target,comparators))
  file_contents <- data.frame(file_contents)
  write.table(file_contents,target_junc_file,
            sep="\t",
            quote=F,
            col.names=F,
            row.names=F)
}

saveRDS(comparisons,file="/media/theron/My_Passport/Valsamo/analysis/leafcutterMD/comparisons.rds")

Prepping comparisons for splicemutr transcript formation

comparison_junctions <- list()
leafcutter_files <- read.table("/media/theron/My_Passport/Valsamo/analysis/leafcutterMD/filenames.txt")
for (i in seq(nrow(leafcutter_files))){
  print(i)
  outlier_file <- leafcutter_files[i,]
  outlier_juncs <- read.table(outlier_file)
  outlier_junc_cols <- str_replace_all(colnames(outlier_juncs),".filt","")
  colnames(outlier_juncs) <- str_replace_all(outlier_junc_cols,"[-_+.]","_")
  file_split <- strsplit(outlier_file,"_juncs_")[[1]]
  sample <- basename(file_split[1])
  sample <- substr(sample,1,nchar(sample))
  sample<-str_replace_all(sample,"[-_+.]","_")
  arm_info <- strsplit(file_split[2],"_")[[1]]
  tar_samp <- sprintf("%s_%s",arm_info[1],arm_info[2])
  comp_samp <- sprintf("%s_%s",arm_info[3],arm_info[4])
  tar<-arm_info[5]
  comp<-arm_info[6]
  comparison <- sprintf("%s_%s_%s_%s",tar_samp,comp_samp,tar,comp)
  sig_juncs <- rownames(outlier_juncs)[as.numeric(outlier_juncs[,sample]) <=0.05]
  comparison_junctions[[comparison]] <- unique(c(comparison_junctions[[comparison]],sig_juncs))
}
comparison_juncs_linear <- data.frame(t(vapply(unname(unlist(comparison_junctions)),function(junc){
  junc_vals<-strsplit(junc,":")[[1]]
  strand<-strsplit(junc_vals[4],"_")[[1]][3]
  c(junc_vals[1],junc_vals[2],junc_vals[3],strand)
},character(4))))
rownames(comparison_juncs_linear)<-seq(nrow(comparison_juncs_linear))
colnames(comparison_juncs_linear)<-c("chr","start","end","strand")

iter<-1
total_juncs <- nrow(comparison_juncs_linear)
for (i in seq(1,total_juncs,10000)){
  if (i+9999 >= total_juncs){
    end = total_juncs
  } else {
    end = i+9999
  }
  comparison_juncs_small <- comparison_juncs_linear[seq(i,end),]
  saveRDS(comparison_juncs_small,
          sprintf("/media/theron/My_Passport/Valsamo/analysis/leafcutterMD/comparison_juncs_linear_%d.rds",iter))
  iter<-iter+1
}


saveRDS(comparison_juncs_linear,file="/media/theron/My_Passport/Valsamo/analysis/leafcutterMD/comparison_juncs_linear.rds")
saveRDS(comparison_junctions,file="/media/theron/My_Passport/Valsamo/analysis/leafcutterMD/comparison_juncs.rds")

Looking at junction overlap between comparison groups

comparison_junctions <- readRDS("/media/theron/My_Passport/Valsamo/analysis/leafcutterMD/comparison_juncs.rds")

for (i in seq(length(comparison_junctions))){
    comparison_junctions[[i]]<-vapply(comparison_junctions[[i]],function(junc){
      junc_vals<-strsplit(junc,":")[[1]]
      strand<-strsplit(junc_vals[4],"_")[[1]][3]
      sprintf("%s:%s:%s:%s",junc_vals[1],junc_vals[2],junc_vals[3],strand)
    },character(1))
}

for (i in seq(length(comparison_junctions))){
  comparison_junctions[[i]]<-unique(comparison_junctions[[i]])
}
comp_juncs <- lapply(seq(length(comparison_junctions)),function(val){
  current<-comparison_junctions[[val]]
  comparisons<-vapply(seq(length(comparison_junctions)),function(comp_val){
    length(which(comparison_junctions[[comp_val]] %in% current))/length(comparison_junctions[[comp_val]])
  },as.numeric(1))
})
comp_juncs<-as.data.frame(matrix(unlist(comp_juncs),byrow=T,nrow=length(comp_juncs)))
colnames(comp_juncs)<-names(comparison_junctions)
rownames(comp_juncs)<-names(comparison_junctions)

pheatmap::pheatmap(comp_juncs,cluster_cols=F,cluster_rows = F)
pheatmap::pheatmap(comp_juncs,cluster_cols=F,cluster_rows = T)

Compiling the genotype information per sample

genotype_files <- read.table("/media/theron/My_Passport/Valsamo/genotypes/filenames.txt")
genotypes<-lapply(seq(nrow(genotype_files)),function(row_val){
  spec_genotype_dir <- genotype_files[row_val,]
  file <- sprintf("%s/%s",spec_genotype_dir,list.files(spec_genotype_dir,"*genotype.json"))
  spec_genotype_json <- fromJSON(file=file)
  spec_genotype_json <- unname(unlist(spec_genotype_json))
  vapply(spec_genotype_json,function(allele){
    allele<-str_split(allele,":")[[1]]
    allele<-paste(allele[seq(2)],collapse=":")
    sprintf("HLA-%s",str_replace(allele,"[*]",""))
  },character(1))
})
names(genotypes) <- vapply(seq(nrow(genotype_files)),function(row_val){
  spec_genotype_dir <- genotype_files[row_val,]
  sample <- str_replace(basename(spec_genotype_dir),"Aligned.out.bam.sorted_dir","")
},character(1))

unique_alleles <- unique(unname(unlist(genotypes)))
class <- str_detect(unique_alleles,"HLA-A") | str_detect(unique_alleles,"HLA-B") | str_detect(unique_alleles,"HLA-C")
class_1_alleles <- data.frame(unique_alleles[class])
class_2_alleles <- data.frame(unique_alleles[!class])

write.table(class_1_alleles,
            file="/media/theron/My_Passport/Valsamo/genotypes/class_1_alleles.txt",
            quote=F, col.names = F, row.names = F, sep = "\t")
write.table(class_2_alleles,
            file="/media/theron/My_Passport/Valsamo/genotypes/class_2_alleles.txt",
            quote=F, col.names = F, row.names = F, sep = "\t")
saveRDS(genotypes,file="/media/theron/My_Passport/Valsamo/genotypes/genotypes.rds")

Creating specific junction files per comparison with respect to data_splicemutr

comparison_junctions <- readRDS("/media/theron/My_Passport/Valsamo/analysis/leafcutterMD/comparison_juncs.rds")
data_splicemutr <- read.table("/media/theron/My_Passport/Valsamo/analysis/splicemutr_output/data_splicemutr.txt",header=T)
data_splicemutr$juncs <- sprintf("%s:%s:%s:%s",data_splicemutr$chr,data_splicemutr$start,data_splicemutr$end,data_splicemutr$strand)
data_splicemutr$orig_row <- seq(nrow(data_splicemutr))-1
genotypes <- readRDS(file="/media/theron/My_Passport/Valsamo/genotypes/genotypes.rds")

NIV3_PROG_NIV3_PROG_SD_PD

comparisons <- names(comparison_junctions)
for (comparison in comparisons){
  output_loc <- "/media/theron/My_Passport/Valsamo/analysis/splicemutr_output"
  juncs_str <- comparison_junctions[[comparison]]
  juncs <- data.frame(matrix(unlist(strsplit(juncs_str,"[:]")),
                                            byrow=T,
                                            nrow=length(juncs_str)))
  colnames(juncs) <- c("chr","start","end","cluster")
  juncs$strand <- vapply(juncs$cluster,function(clu){strsplit(clu,"_")[[1]][3]},character(1))
  juncs$juncs <- sprintf("%s:%s:%s:%s",juncs$chr,juncs$start,juncs$end,juncs$strand)
  data_splicemutr_specific <- data_splicemutr[which(data_splicemutr$juncs %in% juncs$juncs),]
  
  specific_splice_file <- sprintf("%s/%s_data_splicemutr.rds",output_loc,comparison)
  
  saveRDS(data_splicemutr_specific,file=specific_splice_file)
}

Cibersort data

cibersort_data <- read.table("/media/theron/My_Passport/Valsamo/analysis/cibersort/CIBERSORTx_Job3_Results.txt",header=T)
man_samps<-manifest$Sample
man_ax_bor <- manifest$AX_BOR
manifest_dat <- data.frame(man_samps,man_ax_bor)
colnames(manifest_dat)<-c("samples","type_ind")
rownames(manifest_dat)<-manifest_dat$samples
cibersort_col_data<-data.frame(man_samps,man_ax_bor)
colnames(cibersort_col_data)<-c("samples","type_ind")
rownames(cibersort_col_data)<-cibersort_col_data$samples
cibersort_col_data<-cibersort_col_data[cibersort_data$Mixture,]
cibersort_data <- cibersort_data %>% dplyr::filter(P.value <= 0.05)
cibersort_col_data <- cibersort_col_data[cibersort_data$Mixture,]
cib_order <- order(cibersort_col_data$type_ind)
cibersort_col_data<-cibersort_col_data[cib_order,]
cibersort_data<-cibersort_data[cib_order,]

Heatmap(scale(as.matrix(t(cibersort_data[,seq(2,ncol(cibersort_data)-4)]))),
        top_annotation = HeatmapAnnotation(type=cibersort_col_data$type_ind),
        show_row_names=T,
        show_column_names = F,
        cluster_rows=T,
        cluster_columns=F)

Heatmap(scale(as.matrix(t(cibersort_data[,seq(2,ncol(cibersort_data)-4)]))),
        top_annotation = HeatmapAnnotation(type=cibersort_col_data$type_ind),
        show_row_names=T,
        show_column_names = F,
        cluster_rows=T,
        cluster_columns=T)

Looking at gene metrics per comparison

Mean processing

all_diff_exp<-readRDS(file="/media/theron/My_Passport/Valsamo/analysis/Deseq2/diff_expr.rds")

gene_metric_files <- "/media/theron/My_Passport/Valsamo/analysis/splicemutr_output/gene_metric_files_mean.txt"
gene_metric_comp<-read.table(gene_metric_files,header=F)
total_genes<-c()
all_comps <- list()
samples <- c()
comparisons <- c()
for (comp in gene_metric_comp$V1){
  comp_vals <- readRDS(comp)
  comp <- str_replace(basename(comp),"_gene_metric_mean.rds","")
  genes <- rownames(comp_vals)
  total_genes<-unique(c(total_genes,genes))
  all_comps[[comp]] <- comp_vals
  samples <- c(samples,colnames(comp_vals))
  comparisons <- c(comparisons,rep(comp,ncol(comp_vals)))
  
}

Mean comparisons all

comp_vals_all <- data.frame(total_genes)
rownames(comp_vals_all)<-total_genes
for (comp in names(all_comps)){
  comp_vals <- all_comps[[comp]]
  comp_vals_all[rownames(comp_vals),sprintf("%s_%d",colnames(comp_vals),which(names(all_comps)==comp))]<-comp_vals
}
comp_vals_all <- comp_vals_all[,seq(2,ncol(comp_vals_all))]

col_data <- data.frame(samples,comparisons)
col_data$type <- vapply(col_data$comparisons,function(comp){
  substr(comp,nchar(comp)-4,nchar(comp))
},character(1))
col_data$treatment <- vapply(col_data$comparisons,function(comp){
  a<-strsplit(comp,"_")[[1]]
  a<-paste(a[seq(4)],collapse="_")
},character(1))
col_data$type_ind <- manifest_dat[col_data$samples,"type_ind"]

comp_vals_all[is.na(comp_vals_all)]<-0
comp_vals_all <- comp_vals_all[apply(comp_vals_all,1,sd)!=0,]

comps <- unique(col_data$type_ind)
comps_dat<-data.frame(vapply(comps,function(comp){
  apply(comp_vals_all[,col_data$type_ind==comp],1,mean)
},numeric(nrow(comp_vals_all))))
comps_dat$expr_order <- vapply(seq(nrow(comps_dat)),function(row_val){
  a<-as.numeric(comps_dat[row_val,])
  paste(comps[order(a,decreasing=T)],collapse=">")
},character(1))
comps_rows <- nrow(comps_dat)
comps_dat_linear <- data.frame(c(comps_dat$PR,comps_dat$PD,comps_dat$SD,comps_dat$CR),
                               c(rep(comps[1],comps_rows),
                                 rep(comps[2],comps_rows),
                                 rep(comps[3],comps_rows),
                                 rep(comps[4],comps_rows)))
colnames(comps_dat_linear)<-c("splice_mut","response")

# Estimate a linear regression model: x is the group number
cf <- coef(lm(log2(splice_mut+1)~as.numeric(factor(response)), data=comps_dat_linear))

my_comparisons <- list( c(comps[1],comps[2] ), 
                        c(comps[1], comps[3]), 
                        c(comps[1], comps[4]), 
                        c(comps[2], comps[3]),
                        c(comps[2], comps[4]),
                        c(comps[3], comps[4]))
ggplot(comps_dat_linear,aes(x=response,y=log2(splice_mut+1),group=response,fill=response))+
  geom_violin()+
  stat_compare_means(comparisons = my_comparisons,method = "wilcox.test")+
  stat_summary(aes(group=1),fun=mean, geom="point", color="red", size=1) +
 geom_abline(slope=cf[2], intercept=cf[1], lwd=.5)

ggplot(comps_dat,aes(x=expr_order))+
  geom_bar(aes(y=(..count..)/sum(..count..)))+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Heatmap(as.matrix(comp_vals_all),
        top_annotation = HeatmapAnnotation(type=col_data$type,type_ind=col_data$type_ind, treatment=col_data$treatment),
        show_row_names=F,
        show_column_names = F,
        cluster_rows=T,
        cluster_columns=T)
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

comp_vals_all <- comp_vals_all[apply(comp_vals_all,1,sd)!=0,]
Heatmap(t(apply(as.matrix(comp_vals_all),1,scale)),
    top_annotation = HeatmapAnnotation(type=col_data$type,type_ind=col_data$type_ind, treatment=col_data$treatment),
    show_row_names=F,
    show_column_names = F,
    cluster_rows=T,
    cluster_columns=T)
## The automatically generated colors map from the minus and plus 99^th of
## the absolute values in the matrix. There are outliers in the matrix
## whose patterns might be hidden by this color mapping. You can manually
## set the color to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

Mean comparisons no progressive disease

keeps<-which(col_data$type_ind!="PD")
col_data<-col_data[keeps,]
comp_vals_all <- comp_vals_all[,keeps]

comps <- unique(col_data$type_ind)
comps_dat<-data.frame(vapply(comps,function(comp){
  apply(comp_vals_all[,col_data$type_ind==comp],1,mean)
},numeric(nrow(comp_vals_all))))
comps_rows <- nrow(comps_dat)
comps_dat$expr_order <- vapply(seq(nrow(comps_dat)),function(row_val){
  a<-as.numeric(comps_dat[row_val,])
  paste(comps[order(a,decreasing=T)],collapse=">")
},character(1))
comps_rows <- nrow(comps_dat)
comps_dat_linear <- data.frame(c(comps_dat$PR,comps_dat$SD,comps_dat$CR),
                               c(rep(comps[1],comps_rows),
                                 rep(comps[2],comps_rows),
                                 rep(comps[3],comps_rows)))
colnames(comps_dat_linear)<-c("splice_mut","response")

# Estimate a linear regression model: x is the group number
cf <- coef(lm(log2(splice_mut+1)~as.numeric(factor(response)), data=comps_dat_linear))

my_comparisons <- list( c(comps[1],comps[2]), 
                        c(comps[1], comps[3]), 
                        c(comps[2], comps[3]))
ggplot(comps_dat_linear,aes(x=response,y=log2(splice_mut+1),group=response,fill=response))+
  geom_violin()+
  stat_compare_means(comparisons = my_comparisons)+
  stat_summary(aes(group=1),fun=mean, geom="point", color="red", size=1) +
 geom_abline(slope=cf[2], intercept=cf[1], lwd=.5)

ggplot(comps_dat,aes(x=expr_order,fill=expr_order))+
  geom_bar(aes(y=(..count..)/sum(..count..)))+labs(x="expression order",y="gene counts")

comp_vals_all[is.na(comp_vals_all)]<-0
Heatmap(as.matrix(comp_vals_all),
        top_annotation = HeatmapAnnotation(type=col_data$type,type_ind=col_data$type_ind, treatment=col_data$treatment),
        show_row_names=F,
        show_column_names = F,
        cluster_rows=T,
        cluster_columns=T)
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

Heatmap(t(apply(as.matrix(comp_vals_all),1,scale)),
    top_annotation = HeatmapAnnotation(type=col_data$type,type_ind=col_data$type_ind, treatment=col_data$treatment),
    show_row_names=F,
    show_column_names = F,
    cluster_rows=T,
    cluster_columns=T)
## The automatically generated colors map from the minus and plus 99^th of
## the absolute values in the matrix. There are outliers in the matrix
## whose patterns might be hidden by this color mapping. You can manually
## set the color to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

Max processing

all_diff_exp<-readRDS(file="/media/theron/My_Passport/Valsamo/analysis/Deseq2/diff_expr.rds")

gene_metric_files <- "/media/theron/My_Passport/Valsamo/analysis/splicemutr_output/gene_metric_files_max.txt"
gene_metric_comp<-read.table(gene_metric_files,header=F)
total_genes<-c()
all_comps <- list()
samples <- c()
comparisons <- c()
for (comp in gene_metric_comp$V1){
  comp_vals <- readRDS(comp)
  comp <- str_replace(basename(comp),"_gene_metric_max.rds","")
  genes <- rownames(comp_vals)
  total_genes<-unique(c(total_genes,genes))
  all_comps[[comp]] <- comp_vals
  samples <- c(samples,colnames(comp_vals))
  comparisons <- c(comparisons,rep(comp,ncol(comp_vals)))
  
}

Max comparisons all

comp_vals_all <- data.frame(total_genes)
rownames(comp_vals_all)<-total_genes
for (comp in names(all_comps)){
  comp_vals <- all_comps[[comp]]
  comp_vals_all[rownames(comp_vals),sprintf("%s_%d",colnames(comp_vals),which(names(all_comps)==comp))]<-comp_vals
}
comp_vals_all <- comp_vals_all[,seq(2,ncol(comp_vals_all))]

col_data <- data.frame(samples,comparisons)
col_data$type <- vapply(col_data$comparisons,function(comp){
  substr(comp,nchar(comp)-4,nchar(comp))
},character(1))
col_data$treatment <- vapply(col_data$comparisons,function(comp){
  a<-strsplit(comp,"_")[[1]]
  a<-paste(a[seq(4)],collapse="_")
},character(1))
col_data$type_ind <- manifest_dat[col_data$samples,"type_ind"]

comp_vals_all[is.na(comp_vals_all)]<-0
comp_vals_all <- comp_vals_all[apply(comp_vals_all,1,sd)!=0,]

comps <- unique(col_data$type_ind)
comps_dat<-data.frame(vapply(comps,function(comp){
  apply(comp_vals_all[,col_data$type_ind==comp],1,mean)
},numeric(nrow(comp_vals_all))))
comps_dat$expr_order <- vapply(seq(nrow(comps_dat)),function(row_val){
  a<-as.numeric(comps_dat[row_val,])
  paste(comps[order(a,decreasing=T)],collapse=">")
},character(1))
comps_rows <- nrow(comps_dat)
comps_dat_linear <- data.frame(c(comps_dat$PR,comps_dat$PD,comps_dat$SD,comps_dat$CR),
                               c(rep(comps[1],comps_rows),
                                 rep(comps[2],comps_rows),
                                 rep(comps[3],comps_rows),
                                 rep(comps[4],comps_rows)))
colnames(comps_dat_linear)<-c("splice_mut","response")

# Estimate a linear regression model: x is the group number
cf <- coef(lm(log2(splice_mut+1)~as.numeric(factor(response)), data=comps_dat_linear))

my_comparisons <- list( c(comps[1],comps[2] ), 
                        c(comps[1], comps[3]), 
                        c(comps[1], comps[4]), 
                        c(comps[2], comps[3]),
                        c(comps[2], comps[4]),
                        c(comps[3], comps[4]))
ggplot(comps_dat_linear,aes(x=response,y=log2(splice_mut+1),group=response,fill=response))+
  geom_violin()+
  stat_compare_means(comparisons = my_comparisons,method = "wilcox.test")+
  stat_summary(aes(group=1),fun=mean, geom="point", color="red", size=1) +
 geom_abline(slope=cf[2], intercept=cf[1], lwd=.5)

ggplot(comps_dat,aes(x=expr_order))+
  geom_bar(aes(y=(..count..)/sum(..count..)))+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  scale_y_log10()

Heatmap(as.matrix(comp_vals_all),
        top_annotation = HeatmapAnnotation(type=col_data$type,type_ind=col_data$type_ind, treatment=col_data$treatment),
        show_row_names=F,
        show_column_names = F,
        cluster_rows=T,
        cluster_columns=T)
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

comp_vals_all <- comp_vals_all[apply(comp_vals_all,1,sd)!=0,]
Heatmap(t(apply(as.matrix(comp_vals_all),1,scale)),
    top_annotation = HeatmapAnnotation(type=col_data$type,type_ind=col_data$type_ind, treatment=col_data$treatment),
    show_row_names=F,
    show_column_names = F,
    cluster_rows=T,
    cluster_columns=T)
## The automatically generated colors map from the minus and plus 99^th of
## the absolute values in the matrix. There are outliers in the matrix
## whose patterns might be hidden by this color mapping. You can manually
## set the color to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

Max comparisons no progressive disease

keeps<-which(col_data$type_ind!="PD")
col_data<-col_data[keeps,]
comp_vals_all <- comp_vals_all[,keeps]

comps <- unique(col_data$type_ind)
comps_dat<-data.frame(vapply(comps,function(comp){
  apply(comp_vals_all[,col_data$type_ind==comp],1,mean)
},numeric(nrow(comp_vals_all))))
comps_rows <- nrow(comps_dat)
comps_dat$expr_order <- vapply(seq(nrow(comps_dat)),function(row_val){
  a<-as.numeric(comps_dat[row_val,])
  paste(comps[order(a,decreasing=T)],collapse=">")
},character(1))
comps_rows <- nrow(comps_dat)
comps_dat_linear <- data.frame(c(comps_dat$PR,comps_dat$SD,comps_dat$CR),
                               c(rep(comps[1],comps_rows),
                                 rep(comps[2],comps_rows),
                                 rep(comps[3],comps_rows)))
colnames(comps_dat_linear)<-c("splice_mut","response")

# Estimate a linear regression model: x is the group number
cf <- coef(lm(log2(splice_mut+1)~as.numeric(factor(response)), data=comps_dat_linear))

my_comparisons <- list( c(comps[1],comps[2]), 
                        c(comps[1], comps[3]), 
                        c(comps[2], comps[3]))
ggplot(comps_dat_linear,aes(x=response,y=log2(splice_mut+1),group=response,fill=response))+
  geom_violin()+
  stat_compare_means(comparisons = my_comparisons)+
  stat_summary(aes(group=1),fun=mean, geom="point", color="red", size=1) +
 geom_abline(slope=cf[2], intercept=cf[1], lwd=.5)

ggplot(comps_dat,aes(x=expr_order,fill=expr_order))+
  geom_bar(aes(y=(..count..)/sum(..count..)))+labs(x="expression order",y="gene counts")

comp_vals_all[is.na(comp_vals_all)]<-0
Heatmap(as.matrix(comp_vals_all),
        top_annotation = HeatmapAnnotation(type=col_data$type,type_ind=col_data$type_ind, treatment=col_data$treatment),
        show_row_names=F,
        show_column_names = F,
        cluster_rows=T,
        cluster_columns=T)
## The automatically generated colors map from the 1^st and 99^th of the
## values in the matrix. There are outliers in the matrix whose patterns
## might be hidden by this color mapping. You can manually set the color
## to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

Heatmap(t(apply(as.matrix(comp_vals_all),1,scale)),
    top_annotation = HeatmapAnnotation(type=col_data$type,type_ind=col_data$type_ind, treatment=col_data$treatment),
    show_row_names=F,
    show_column_names = F,
    cluster_rows=T,
    cluster_columns=T)
## The automatically generated colors map from the minus and plus 99^th of
## the absolute values in the matrix. There are outliers in the matrix
## whose patterns might be hidden by this color mapping. You can manually
## set the color to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.
## `use_raster` is automatically set to TRUE for a matrix with more than
## 2000 rows. You can control `use_raster` argument by explicitly setting
## TRUE/FALSE to it.
## 
## Set `ht_opt$message = FALSE` to turn off this message.

Differential expression analysis using Deseq2

comparison_file <- "/media/theron/My_Passport/Valsamo/analysis/leafcutterMD/comparisons.rds"
comparisons <- readRDS(comparison_file)
gene_expression_file <- "/media/theron/My_Passport/Valsamo/featurecounts_all.rds"
gene_expression <- readRDS(gene_expression_file)
all_diff_exp<-list()
for (comp in names(comparisons)){
  comp_val <- comparisons[[comp]]
  targets <- comp_val$targets
  comparators <- comp_val$comparators
  samples <- colnames(gene_expression)
  target_samples<-samples[which(samples %in% targets)]
  comparator_samples<-samples[which(samples %in% comparators)]
  gene_expression_deseq2 <- gene_expression[,c(target_samples,comparator_samples)]
  coldata <- data.frame(c(target_samples,comparator_samples))
  colnames(coldata)<-"samples"
  rownames(coldata)<-coldata$samples
  coldata$condition <- c(rep("target",length(target_samples)),rep("comparator",length(comparator_samples)))  
  dds <- DESeqDataSetFromMatrix(countData = as.matrix(gene_expression_deseq2),
                            colData = coldata,
                            design = ~ condition)
  dds <- DESeq(dds)
  res <- data.frame(results(dds))
  all_diff_exp[[comp]]<-res
}
saveRDS(all_diff_exp,file="/media/theron/My_Passport/Valsamo/analysis/Deseq2/diff_expr.rds")

gsea results on GO and KEGG terms

reading in fgsea results

fgsea_file <- "/media/theron/My_Passport/Valsamo/analysis/fgsea/fgsea_all.rds"
fgsea_data <- readRDS(fgsea_file)
comparisons <- names(fgsea_data)
comp <- comparisons[1]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[2]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[3]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[4]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[5]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[6]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[7]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[8]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[8]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[9]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[10]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[11]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[12]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[13]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[14]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[15]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[16]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))
comp <- comparisons[17]
fgsea_comp <- fgsea_data[[comp]]
GO_table<-fgsea_comp$GO
KEGG_table<-fgsea_comp$KEGG
datatable(GO_table,caption=sprintf("%s_GO",comp))
datatable(KEGG_table,caption=sprintf("%s_KEGG",comp))